################################
## Script: 07_gpt_annotator_finetune_bioweapons.R
## Purpose: This code runs the potential matches
## from cross encoder through the fine tuned GPT annotator. 
## Data In:
## data/gpt4o_toannotate.rds
## Data Out:
## all files with the form
## data/gpt4o_finetune_annotations/annotate_[num].rds
## this is the gpt 4o output with annotation in message comments
## Notes:
## Need to run with an array job, with arrays 1-100
## Also need openai credentials

library(tidyverse)
library(openai)
library(jsonlite)

#########################################
#### Read Files ######################
#########################################

## credentials - set 
#Sys.setenv(
 # OPENAI_API_KEY = ''
#)


slurm_arrayid <- Sys.getenv('SLURM_ARRAY_TASK_ID')
print(slurm_arrayid)

## candidate pairs
out <- readRDS("data/gpt4o_toannotate.rds")

## split into 100 batches, reserve batch for 
## specific array

out <- split(out, 1:100)

out <- out[[as.numeric(slurm_arrayid)]]

######################################
### Prompts  #########################
######################################

prompt_compare_subject <- "You will be provided with the lists of the people, places, objects, and events discussed in two paragraphs. Based on these
lists, do the two paragraphs discuss the vast majority of the same people, places, objects, and events?"
prompt_compare_claim <- "You will be provided with the lists of descriptive, normative, conceptual, and causal claims discussed in two paragraphs. 
Based on these lists, do the two paragraphs discuss the vast majority of the same claims?" 


api_fct_claim <- function(x){
  tryCatch(
    expr = {
      m <- create_chat_completion(model = "ft:gpt-4o-2024-08-06:personal::AcobyL9Q",
                                  messages = list(list(role = "system",
                                                       content = prompt_compare_claim),
                                                  list(role = "user",
                                                       content = x)),
                                  temperature = 0)
      m$choices$message.content
    },
    error = function(e){
      message('Caught an error!')
      Sys.sleep(sl)
      sl <- sl + .1
      NA
    }
  )    
}

api_fct_subject <- function(x){
  tryCatch(
    expr = {
      m <- create_chat_completion(model = "ft:gpt-4o-2024-08-06:personal::Acor6lGL",
                                  messages = list(list(role = "system",
                                                       content = prompt_compare_subject),
                                                  list(role = "user",
                                                       content = x)),
                                  temperature = 0)
      m$choices$message.content
    },
    error = function(e){
      message('Caught an error!')
      Sys.sleep(sl)
      sl <- sl + .1
      NA
    }
  )    
}

######################################
### Run Completions ##################
######################################

## setting initial sleep parameter for api_fct
sl <- 1
annotation_subject <- list()
for(j in 1:nrow(out)){
  prompt_s <- paste0("\n **Paragraph 1**: ", out$ego_subject[j],
                     "\n **Paragraph 2**: ", out$alter_subject[j],
                     "\n", "**Your label (Respond only with 'YES' or 'NO')**:")
  annotation_subject[[j]] <- api_fct_subject(prompt_s)
  print(j)
}
out$gpt4o_fine_tune_same_subject <- unlist(annotation_subject)

## setting initial sleep parameter for api_fct
sl <- 1
annotation_claim <- list()
for(j in 1:nrow(out)){
  if(out$gpt4o_fine_tune_same_subject[j] == "YES" &
     !is.na(out$gpt4o_fine_tune_same_subject[j])){
    
    prompt_c <- paste0("\n **Paragraph 1**: ", out$ego_claim[j],
                       "\n **Paragraph 2**: ", out$alter_claim[j],
                       "\n", "**Your label (Respond only with 'YES' or 'NO')**:")
    annotation_claim[[j]] <- api_fct_claim(prompt_c)
    
  } else{
    annotation_claim[[j]] <- NA
  }
  print(j)
}
out$gpt4o_fine_tune_same_claim <- unlist(annotation_claim)


filename <- paste0("data/gpt4o_finetune_annotations/annotate_",
                   slurm_arrayid, ".rds")

saveRDS(out, filename)

